# -*- coding: utf-8 -*-
import numpy, os, json, h5py, pickle
import numpy as np
import argparse


def norm2(feats=None): ## (15, N, 2048) ----norm along dim=2(the 2048 dim)----> (15, N, 2048)
    norm = numpy.sqrt(numpy.sum(feats**2, axis=2))
    feats /= norm[:, :, None]
    return feats


def norm2_2d(feats=None): ## (N, 2048) ----norm along dim=1(the 2048 dim)----> (N, 2048)
    norm = numpy.sqrt(numpy.sum(feats**2, axis=1))
    feats /= norm[:, None]
    return feats


def np_softmax_1d(scores, T=1.0): 
    score_T = scores/T
    return np.exp(score_T) / np.sum( np.exp(score_T) )


def show_score(sim_clip): 
    str1 = ''
    str2 = ''
    (r1, r5, r10, medr, meanr, ranks, top1) = i2t(sim_clip)
    (r1i, r5i, r10i, medri, meanri, ranks_t2i, top1i) = t2i(sim_clip)
    str1 = "\t Image to text: %.1f, %.1f, %.1f, %.1f, %.1f \t Text to image: %.1f, %.1f, %.1f, %.1f, %.1f" % (r1, r5, r10, medr, meanr, r1i, r5i, r10i, medri, meanri)
    score= r1+r5+r10+r1i+r5i+r10i
    str2 = "\t currscore: %.1f" % (score)
    
    print(str1)
    print(str2)
    
    return str1, str2, score, (r1, r5, r10, medr, meanr, ranks, top1), (r1i, r5i, r10i, medri, meanri, ranks_t2i, top1i)
    


def i2t(sim, return_ranks=True): ## from SAEM
    npts = sim.shape[0]
    ranks = np.zeros(npts)
    top1 = np.zeros(npts)
    for index in range(npts):
        inds = np.argsort(sim[index])[::-1]
        
        # Score
        rank = 1e20
        for i in range(5 * index, 5 * index + 5, 1):
            tmp = np.where(inds == i)[0][0]
            if tmp < rank:
                rank = tmp
        ranks[index] = rank
        top1[index] = inds[0]
    
    # Compute metrics
    r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)
    medr = np.floor(np.median(ranks)) + 1
    meanr = ranks.mean() + 1
    if return_ranks:
        return (r1, r5, r10, medr, meanr, ranks, top1)
    else:
        return (r1, r5, r10, medr, meanr)


def t2i(sim, return_ranks=True):
    npts = sim.shape[0]
    
    ranks = np.zeros(5 * npts)
    top1 = np.zeros(5 * npts)
    
    # --> (5N(caption), N(image))
    sim = sim.T
    
    for index in range(npts):
        for i in range(5):
            inds = np.argsort(sim[5 * index + i])[::-1]
            ranks[5 * index + i] = np.where(inds == index)[0][0]
            top1[5 * index + i] = inds[0]
    
    # Compute metrics
    r1 = 100.0 * len(np.where(ranks < 1)[0]) / len(ranks)
    r5 = 100.0 * len(np.where(ranks < 5)[0]) / len(ranks)
    r10 = 100.0 * len(np.where(ranks < 10)[0]) / len(ranks)
    medr = np.floor(np.median(ranks)) + 1
    meanr = ranks.mean() + 1
    if return_ranks:
        return (r1, r5, r10, medr, meanr, ranks, top1)
    else:
        return (r1, r5, r10, medr, meanr)
    

## test only
class OPT(object):
    def __init__(self, 
                 vocab='./vocab_idx_word/vg_vocab.json', 
                 p_feas='./p_feas_vlkb_word_idx_region_feat/p_feas.npy', 
                 tags='./tags_NN/tags', 
                 parses='./parses_JJ/parses', 
                 img_feats='./bu_precomp_feats/f30k_test_buctxbox.h5', 
                 sims='./base_sims/f30k_RN50x16 test embedding_sim.npy', 
                 metric='cosine', 
                 test_mode='t2i', 
                 test_type='', 
                 top_k=15, 
                 JJ_scale=1.0, 
                 t2i_scale=0.1, 
                 i2t_scale=0.03, 
                 T=1.0, 
                 output='./output/date_xx_xx_xx_output', 
                ):
        self.vocab = vocab
        self.p_feas = p_feas
        self.tags = tags
        self.parses = parses
        self.img_feats = img_feats
        self.sims = sims
        self.metric = metric
        self.test_mode = test_mode
        self.test_type = test_type
        self.top_k = top_k
        self.JJ_scale = JJ_scale
        self.t2i_scale = t2i_scale
        self.i2t_scale = i2t_scale
        self.T = T
        self.output = output
    

def get_NN_with_JJ_feats(idx, tags, word2idx, p_feas, parses, NN_scale=1.0, JJ_scale=1.0): 
    words = tags[idx] ## idx th sentence' all words with tags
    
    NNs = []
    feas = []
    for word in words:
        if 'NN' in word[1] and word[0] in word2idx.keys(): ## this word is 'NN~'(noun~) && in the bidirectional vocab(27801 words in total)
            NNs.append(word[0])
            word_idx = word2idx[word[0]]
            o_fea = p_feas[word_idx] ## retrieve from VG-VL-KB's corresponding BU feat (AvgPooled/mean) by word idx in bidirectional-vocab
            jj_fea = 0
            for row in parses[idx]:
                if word[0] == row[0][0] and 'amod' in row[1] and 'JJ' in row[2][1] and row[2][0] in word2idx.keys():
                    jj_idx = word2idx[row[2][0]] ## adj ----add in----> list
                    jj_fea = p_feas[jj_idx] ## retrieve adj's (AvgPooled/mean) BU feat 
                    break
            
            new_fea = NN_scale*o_fea + JJ_scale*jj_fea
            feas.append(new_fea)
    
        ## if no NN, then choose a one-hot vector instead
        if len(feas) == 0:
            temp = numpy.zeros(2048)
            temp[0] = 1.
            feas.append(temp)
    
    return feas


##【8】 t2i & i2t evaluation
def eval(opt, test_mode, len_test, sim_clip, tags, word2idx, p_feas, parses, test_feas): 
    ranks_un = []
    ranks_clip = []
    ranks_comb = []
    k = opt.top_k ## rerank only cares top 15
    
    ##【8.】 for every text/image
    for i in range(len_test): ## f30k has 1000 images with 5 sentences each(i:t==1:5), so 5000 (i,t) pairs in total
        ##【8.】 text/image index
        if test_mode == 't2i': 
            idx = i ## text index
            index = idx//5 ## image index
        if test_mode == 'i2t': 
            index = i
        
        if test_mode == 't2i': 
            sim_clip_i = sim_clip.T[idx] ## (1000,)
        if test_mode == 'i2t': 
            sim_clip_i = sim_clip[index] ## (5000,)
        inds_clip_t5 = numpy.argsort(sim_clip_i)[::-1][0:k] ## descent order sort by similarity(real, -1~1), and return corresponding descent order sorted rank/index(integer, 0~999), and then choose top k(k==15) rank/index
        sim_clip_i_t5 = sim_clip_i[inds_clip_t5] ## translating(mapping) descent order rank(index) to corresponding descent order sort(similarity)
        try:
            if test_mode == 't2i': 
                rank_clip_t5 = numpy.where(inds_clip_t5 == index)[0][0] ## correct index / real rank (one integer), 0 is best(top 1), N means top N+1
            if test_mode == 'i2t': 
                rank_clip_t5 = numpy.where(inds_clip_t5//5 == index)[0][0] ## correct index / real rank (one integer), 0 is best(top 1), N means top N+1
        except:
            rank_clip_t5 = 100 ## if not in top 15, return 100(ignore, means do not use "vocab/knowledge base", so >15 is OK)
        ranks_clip.append(rank_clip_t5)
        
        if test_mode == 't2i': 
            
            feas = get_NN_with_JJ_feats(idx, tags, word2idx, p_feas, parses, NN_scale=opt.NN_scale, JJ_scale=opt.JJ_scale)
            
            feas = numpy.array(feas)[numpy.newaxis,:,:] ## (N, 2048) ----expand----> (1, N, 2048)
            feas_expand = numpy.repeat(feas, k, 0) ## (1, N, 2048) ----expand----> (15, N, 2048)
            sim_rank_i_t5 = numpy.matmul(norm2(feats=feas_expand), norm2(feats=test_feas[inds_clip_t5]).transpose(0,2,1))
            if opt.max_mean=='mean_mean': 
                sim_rank_i_t5 = sim_rank_i_t5.mean(-1).mean(-1) ## (15, N, 36) ----mean(-1)----> (15, N) ----mean(-1)----> (15,) 
            if opt.max_mean=='max_mean': 
                sim_rank_i_t5 = sim_rank_i_t5.max(-1).mean(-1) ## (15, N, 36) ----max(-1)----> (15, N) ----mean(-1)----> (15,) 
        if test_mode == 'i2t': 
            acc_sim_rank_i_t5 = []
            for idx in inds_clip_t5:
                feas = get_NN_with_JJ_feats(idx, tags, word2idx, p_feas, parses, NN_scale=opt.NN_scale, JJ_scale=opt.JJ_scale)
                feas = numpy.array(feas) ## (N, 2048)
                sim_rank_i_t5 = numpy.matmul(norm2_2d(feats=feas), norm2_2d(feats=test_feas[index]).transpose(1,0))
                if opt.max_mean=='mean_mean': 
                    sim_rank_i_t5 = sim_rank_i_t5.mean(-1).mean(-1) ## (N, 36) ----mean(-1)----> (N) ----mean(-1)----> () 
                if opt.max_mean=='max_mean': 
                    sim_rank_i_t5 = sim_rank_i_t5.max(-1).mean(-1) ## (N, 36) ----max(-1)----> (N) ----mean(-1)----> () 
                acc_sim_rank_i_t5.append(sim_rank_i_t5)
        
        
        ##【8.】 VLKB top k rerank
        if test_mode == 't2i': 
            inds_i_t5_temp = numpy.argsort(sim_rank_i_t5)[::-1] ## e.g. array([ 0,  4,  1,  2, 13, 11,  6,  8, 10,  7,  3, 12,  5,  9, 14])
            inds_i_t5 = inds_clip_t5[inds_i_t5_temp] ## e.g. array([  0, 550, 239, 209, 327, 328, 594, 710, 716, 156, 463,  19,  42,  79, 502])
            try:
                rank_i_t5 = numpy.where(inds_i_t5 == index)[0][0] ## e.g. 0 
            except:
                rank_i_t5 = 100
            ranks_un.append(rank_i_t5)
        if test_mode == 'i2t': 
            acc_sim_rank_i_t5 = numpy.array(acc_sim_rank_i_t5) ## (k, ) cosine
            inds_i_t5_temp = numpy.argsort(acc_sim_rank_i_t5)[::-1] ## e.g. array([ 0,  4,  1,  2, 13, 11,  6,  8, 10,  7,  3, 12,  5,  9, 14])
            inds_i_t5 = inds_clip_t5[inds_i_t5_temp] ## e.g. array([  0, 550, 239, 209, 327, 328, 594, 710, 716, 156, 463,  19,  42,  79, 502])
            try:
                rank_i_t5 = numpy.where(inds_i_t5//5 == index)[0][0] ## e.g. 0 
            except:
                rank_i_t5 = 100
            ranks_un.append(rank_i_t5)

        
        ##【8.】 sim + VLKB 
        if opt.metric == 'softmax': 
            sim_clip_i_t5 = np_softmax_1d( sim_clip_i_t5 , opt.T)
            if test_mode == 't2i': 
                sim_rank_i_t5 = np_softmax_1d( sim_rank_i_t5 , opt.T)
            if test_mode == 'i2t': 
                acc_sim_rank_i_t5 = np_softmax_1d( acc_sim_rank_i_t5 , opt.T)
        
        if test_mode == 't2i': 
            sim_comb_i_t5 = sim_clip_i_t5 + opt.t2i_scale* sim_rank_i_t5 ## (15,) 
        if test_mode == 'i2t': 
            sim_comb_i_t5 = sim_clip_i_t5 + opt.i2t_scale* acc_sim_rank_i_t5 ## (15,) 
        inds_comb_i_t5_temp = numpy.argsort(sim_comb_i_t5)[::-1] ## array([ 0,  4,  1,  2, 11,  6, 13,  8, 10,  7,  3,  5, 12,  9, 14]) 
        inds_comb_i_t5 = inds_clip_t5[inds_comb_i_t5_temp] ## (15,) 
        try:
            if test_mode == 't2i': 
                rank_comb_i_t5 = numpy.where(inds_comb_i_t5 == index)[0][0] ## 0 
            if test_mode == 'i2t': 
                rank_comb_i_t5 = numpy.where(inds_comb_i_t5//5 == index)[0][0] ## 0 
        except:
            rank_comb_i_t5 = 100
        ranks_comb.append(rank_comb_i_t5)
        
        ##【8.】 sim ; top k's VLKB ; sim + top k's VLKB
        if test_mode == 't2i': 
            print(idx, 'th: clip', rank_clip_t5, 'rank', rank_i_t5, 'comb', rank_comb_i_t5 )
        if test_mode == 'i2t': 
            print(index, 'th: clip', rank_clip_t5, 'rank', rank_i_t5, 'comb', rank_comb_i_t5 )
    
    
    result = []
    ranks_clip = numpy.array(ranks_clip)   
    r1 = 100.0 * len(numpy.where(ranks_clip < 1)[0]) / len(ranks_clip)
    r5 = 100.0 * len(numpy.where(ranks_clip < 5)[0]) / len(ranks_clip)
    r10 = 100.0 * len(numpy.where(ranks_clip < 10)[0]) / len(ranks_clip)
    str1 = '{}, {}, {}, {}'.format(r1, r5, r10, r1+r5+r10)
    print(str1)
    result += [ [ r1, r5, r10, r1+r5+r10 ] ]


    ranks_un = numpy.array(ranks_un)   
    r1 = 100.0 * len(numpy.where(ranks_un < 1)[0]) / len(ranks_un)
    r5 = 100.0 * len(numpy.where(ranks_un < 5)[0]) / len(ranks_un)
    r10 = 100.0 * len(numpy.where(ranks_un < 10)[0]) / len(ranks_un)
    str2 = '{}, {}, {}, {}'.format(r1, r5, r10, r1+r5+r10)
    print(str2)
    result += [ [ r1, r5, r10, r1+r5+r10 ] ]


    ranks_comb = numpy.array(ranks_comb)   
    r1 = 100.0 * len(numpy.where(ranks_comb < 1)[0]) / len(ranks_comb)
    r5 = 100.0 * len(numpy.where(ranks_comb < 5)[0]) / len(ranks_comb)
    r10 = 100.0 * len(numpy.where(ranks_comb < 10)[0]) / len(ranks_comb)
    str3 = '{}, {}, {}, {}'.format(r1, r5, r10, r1+r5+r10)
    print(str3)
    result += [ [ r1, r5, r10, r1+r5+r10 ] ]
    
    return result ## [[1, 5, 10, 1510], [1, 5, 10, 1510], [1, 5, 10, 1510]]


def eval_once(opt, sim_clip, tags, word2idx, p_feas, parses, test_feas): 
    img_num = sim_clip.shape[0] ## f30k: 1000
    txt_num = sim_clip.shape[1] ## f30k: 5000
    
    if opt.test_mode == 't2i': 
        len_test = txt_num
        
        result = eval(opt, opt.test_mode, len_test, sim_clip, tags, word2idx, p_feas, parses, test_feas)
        return result ## [[1, 5, 10, 1510], [1, 5, 10, 1510], [1, 5, 10, 1510]]
    if opt.test_mode == 'i2t': 
        len_test = img_num
        
        result = eval(opt, opt.test_mode, len_test, sim_clip, tags, word2idx, p_feas, parses, test_feas)
        return result ## [[1, 5, 10, 1510], [1, 5, 10, 1510], [1, 5, 10, 1510]]
    if opt.test_mode == 't2i+i2t': 
        
        len_test = txt_num
        result1 = eval(opt, 't2i', len_test, sim_clip, tags, word2idx, p_feas, parses, test_feas)
        len_test = img_num
        result2 = eval(opt, 'i2t', len_test, sim_clip, tags, word2idx, p_feas, parses, test_feas)
        return result1+result2 ## [[1, 5, 10, 1510], [1, 5, 10, 1510], [1, 5, 10, 1510], [1, 5, 10, 1510], [1, 5, 10, 1510], [1, 5, 10, 1510]]





if __name__ == '__main__':
    ##【1】 hyper-parameter
    parser = argparse.ArgumentParser()
    ## VLKB dict/vocab
    parser.add_argument('--vocab', default='./vocab_idx_word/vg_vocab.json') ## (idx <-> word)
    parser.add_argument('--p_feas', default='./p_feas_vlkb_word_idx_region_feat/p_feas.npy') ## (word idx -> prototype region feature)
    ## dataset
    parser.add_argument('--tags', default='./tags_NN/tags') ## one word annotation ('NN' is n.) by StanfordPOSTagger
    parser.add_argument('--parses', default='./parses_JJ/parses') ## two words' relation annotation ('JJ' is adj.) by StanfordDependencyParser
    parser.add_argument('--img_feats', default='./bu_precomp_feats/f30k_test_buctxbox.h5') ## (precomp bu/bottom-up region feats) from SCAN[ECCV 18]/VSRN[ICCV 19]
    ## base model
    parser.add_argument('--sims', default='./base_sims/CLIP/f30k_RN50x16 test embedding_sim.npy') ## test similarity matrix, cosine similarity or probability score
    parser.add_argument('--metric', default='cosine') ## cosine metric for [CLIP, VSRN, SAEM, ALBEF(coarse-grained)]; softmax metric for [UNITER, OSCAR]
    ## test_mode
    parser.add_argument('--test_mode', default='t2i') ## (t2i/i2t/t2i+i2t)
    parser.add_argument('--test_type', default='') ## ['', '1K', '5-fold-1K', '5K', ]
    parser.add_argument('--top_k', default=15, type=int) ## top k rerank
    ## hyper-parameter
    parser.add_argument('--NN_scale', default=1.0, type=float) ## when *NN* + *JJ*, scale factor of NN 
    parser.add_argument('--JJ_scale', default=1.0, type=float) ## when *NN* + *JJ*, scale factor of JJ 
    parser.add_argument('--t2i_scale', default=0.1, type=float) ## scale factor of reranked t2i sim matrix 
    parser.add_argument('--i2t_scale', default=0.03, type=float) ## scale factor of reranked t2i sim matrix 
    parser.add_argument('--T', default=1.0, type=float) ## temperature of softmax 
    parser.add_argument('--max_mean', default='max_mean') ## pooling type 
    ## else
    parser.add_argument('--output', default='./output/date_xx_xx_xx_output') ## 
    opt = parser.parse_args()
    
    ##【2】 vocab (idx<->word)
    with open(opt.vocab, mode='r') as f: ## bidirectional-vocab. 27801 words. 
        dicts = json.load(f)
        word2idx = dicts[0] ## dict: str      -> int
        idx2word = dicts[1] ## dict: str(int) -> str
        
    ##【3】 vlkb (word idx -> region feature)
    p_feas = numpy.load(opt.p_feas) ## (27801, 2048)
    
    ##【4】 img_feats (bu_precomp_feats)
    file=h5py.File(opt.img_feats)
    test_feas = file['ctx'][:] ## (1000, 36, 2048)  ## f30k test dataset has 1000 test (i,t) pairs, with 36 region features (from BU, 2048 dim) per image. 
    file.close()
    
    ##【5】 tags (*NN*)
    f1 = open(opt.tags, 'rb')
    tags = pickle.load(f1) ## list: 5000 sentences' words with tags  ## tags[0] == [('the', 'DT'), ('man', 'NN'), ('with', 'IN'), ('pierced', 'JJ'), ('ears', 'NNS'), ('is', 'VBZ'), ('wearing', 'VBG'), ('glasses', 'NNS'), ('and', 'CC'), ('an', 'DT'), ('orange', 'JJ'), ('hat', 'NN')]
    f1.close()
    
    ##【6】 parses (JJ)
    f2 = open(opt.parses, 'rb')
    parses = pickle.load(f2) ## list: 5000 sentences' words with (word, word, relation)s  ## parses[0] == [(('wearing', 'VBG'), 'nsubj', ('man', 'NN')), (('man', 'NN'), 'det', ('the', 'DT')), (('man', 'NN'), 'nmod', ('ears', 'NNS')), (('ears', 'NNS'), 'case', ('with', 'IN')), (('ears', 'NNS'), 'amod', ('pierced', 'VBN')), (('wearing', 'VBG'), 'aux', ('is', 'VBZ')), (('wearing', 'VBG'), 'obj', ('glasses', 'NNS')), (('glasses', 'NNS'), 'conj', ('hat', 'NN')), (('hat', 'NN'), 'cc', ('and', 'CC')), (('hat', 'NN'), 'det', ('an', 'DT')), (('hat', 'NN'), 'amod', ('orange', 'JJ'))]
    f2.close()
    
    
    

    ##【7】 sim matrix
    sim_clip = numpy.load(opt.sims) ## f30k: (1000, 5000); coco: (5000, 25000)
    
    if opt.test_type == ['', '1K', '5-fold-1K', '5K', ][1]: ## '1K'
        sim_clip = sim_clip[:1000,:5000] ## ->1K
        assert sim_clip.shape[0] == 1000
        _ = show_score(sim_clip)
        str1, str2, score, _2, _3 = _
        (r1, r5, r10, medr, meanr, ranks, top1) = _2
        (r1i, r5i, r10i, medri, meanri, ranks_t2i, top1i) = _3
        result = eval_once(opt, sim_clip, tags[:5000], word2idx, p_feas, parses[:5000], test_feas[:1000])
        print('r1, r5, r10, r1+r5+r10')
        for itm in result: 
            print('{}, {}, {}, {}'.format(itm[0], itm[1], itm[2], itm[3]))
    elif opt.test_type == ['', '1K', '5-fold-1K', '5K', ][2]: ## '5-fold-1K'
        assert sim_clip.shape[0] == 5000
        strs1 = []
        strs2 = []
        rslts = []
        
        sim_avg_1 = []
        sim_avg_5 = []
        sim_avg10 = []
        sim_avg_r = []
        
        rank_avg_1 = []
        rank_avg_5 = []
        rank_avg10 = []
        rank_avg_r = []
        
        comb_avg_1 = []
        comb_avg_5 = []
        comb_avg10 = []
        comb_avg_r = []
        
        _sim_avg_1 = []
        _sim_avg_5 = []
        _sim_avg10 = []
        _sim_avg_r = []
        
        _rank_avg_1 = []
        _rank_avg_5 = []
        _rank_avg10 = []
        _rank_avg_r = []
        
        _comb_avg_1 = []
        _comb_avg_5 = []
        _comb_avg10 = []
        _comb_avg_r = []
        
        for i in range(5): ## loop 5 times
            _ = show_score(sim_clip[i*1000:(i+1)*1000,i*5000:(i+1)*5000]) ## ->1K
            str1, str2, score, _2, _3 = _
            (r1, r5, r10, medr, meanr, ranks, top1) = _2
            (r1i, r5i, r10i, medri, meanri, ranks_t2i, top1i) = _3
            strs1 += [ str1 ]
            strs2 += [ str2 ]
            result = eval_once(opt, sim_clip[i*1000:(i+1)*1000,i*5000:(i+1)*5000], tags[i*5000:(i+1)*5000], word2idx, p_feas, parses[i*5000:(i+1)*5000], test_feas[i*1000:(i+1)*1000])
            rslts += [ result ] ## 5 times' result
            
            sim_avg_1 += [ result[0][0] ]
            sim_avg_5 += [ result[0][1] ]
            sim_avg10 += [ result[0][2] ]
            sim_avg_r += [ result[0][3] ]
            
            rank_avg_1 += [ result[1][0] ]
            rank_avg_5 += [ result[1][1] ]
            rank_avg10 += [ result[1][2] ]
            rank_avg_r += [ result[1][3] ]
            
            comb_avg_1 += [ result[2][0] ]
            comb_avg_5 += [ result[2][1] ]
            comb_avg10 += [ result[2][2] ]
            comb_avg_r += [ result[2][3] ]
            
            if len(result) == 6: ## t2i+i2t
                _sim_avg_1 += [ result[3][0] ]
                _sim_avg_5 += [ result[3][1] ]
                _sim_avg10 += [ result[3][2] ]
                _sim_avg_r += [ result[3][3] ]
                
                _rank_avg_1 += [ result[4][0] ]
                _rank_avg_5 += [ result[4][1] ]
                _rank_avg10 += [ result[4][2] ]
                _rank_avg_r += [ result[4][3] ]
                
                _comb_avg_1 += [ result[5][0] ]
                _comb_avg_5 += [ result[5][1] ]
                _comb_avg10 += [ result[5][2] ]
                _comb_avg_r += [ result[5][3] ]
        
        print(strs1)
        print(strs2)
        print('r1, r5, r10, r1+r5+r10')
        for i, result in enumerate(rslts): 
            print("{} th 1K".format(i+1))
            for itm in result: 
                print('{}, {}, {}, {}'.format(itm[0], itm[1], itm[2], itm[3]))
        
        print('5-fold-1K: (sim, rank, comb)')
        print('r1, r5, r10, r1+r5+r10')
        print('{}, {}, {}, {}'.format(np.mean(sim_avg_1), np.mean(sim_avg_5), np.mean(sim_avg10), np.mean(sim_avg_r)))
        print('{}, {}, {}, {}'.format(np.mean(rank_avg_1), np.mean(rank_avg_5), np.mean(rank_avg10), np.mean(rank_avg_r)))
        print('{}, {}, {}, {}'.format(np.mean(comb_avg_1), np.mean(comb_avg_5), np.mean(comb_avg10), np.mean(comb_avg_r)))
        if len(result) == 6: ## t2i+i2t
            print('{}, {}, {}, {}'.format(np.mean(_sim_avg_1), np.mean(_sim_avg_5), np.mean(_sim_avg10), np.mean(_sim_avg_r)))
            print('{}, {}, {}, {}'.format(np.mean(_rank_avg_1), np.mean(_rank_avg_5), np.mean(_rank_avg10), np.mean(_rank_avg_r)))
            print('{}, {}, {}, {}'.format(np.mean(_comb_avg_1), np.mean(_comb_avg_5), np.mean(_comb_avg10), np.mean(_comb_avg_r)))
        
    elif opt.test_type == ['', '1K', '5-fold-1K', '5K', ][3]: ## '5K'
        assert sim_clip.shape[0] == 5000
        _ = show_score(sim_clip)
        str1, str2, score, _2, _3 = _
        (r1, r5, r10, medr, meanr, ranks, top1) = _2
        (r1i, r5i, r10i, medri, meanri, ranks_t2i, top1i) = _3
        result = eval_once(opt, sim_clip, tags[:25000], word2idx, p_feas, parses[:25000], test_feas[:5000])
        print('r1, r5, r10, r1+r5+r10')
        for itm in result: 
            print('{}, {}, {}, {}'.format(itm[0], itm[1], itm[2], itm[3]))
    else: ## ''
        _ = show_score(sim_clip)
        str1, str2, score, _2, _3 = _
        (r1, r5, r10, medr, meanr, ranks, top1) = _2
        (r1i, r5i, r10i, medri, meanri, ranks_t2i, top1i) = _3
        result = eval_once(opt, sim_clip, tags, word2idx, p_feas, parses, test_feas)
        print('r1, r5, r10, r1+r5+r10')
        for itm in result: 
            print('{}, {}, {}, {}'.format(itm[0], itm[1], itm[2], itm[3]))
    



